{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "autoscroll": "json-false",
    "collapsed": false,
    "ein.tags": [
     "worksheet-0"
    ]
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import sys \n",
    "from os import getcwd, path\n",
    "sys.path.append(path.dirname(getcwd()))\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sb\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "autoscroll": "json-false",
    "collapsed": false,
    "ein.tags": [
     "worksheet-0"
    ]
   },
   "outputs": [],
   "source": [
    "from utils import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inner join with tcr: 29 to 116 rows\n",
      "inner join with tcr: 29 to 116 rows\n",
      "{'dataframe_hash': 687006114396611113,\n",
      " 'provenance_file_summary': {u'cohorts': u'0.4.0+3.gda968fb',\n",
      "                             u'isovar': u'0.0.6',\n",
      "                             u'mhctools': u'0.3.0',\n",
      "                             u'numpy': u'1.11.1',\n",
      "                             u'pandas': u'0.18.1',\n",
      "                             u'pyensembl': u'1.0.3',\n",
      "                             u'scipy': u'0.18.1',\n",
      "                             u'topiary': u'0.1.0',\n",
      "                             u'varcode': u'0.5.10'}}\n"
     ]
    }
   ],
   "source": [
    "cohort = data.init_cohort(join_with=\"tcr\",\n",
    "                          exclude_patient_ids=set(),\n",
    "                          only_patients_with_bams=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "inner join with tcr: 29 to 116 rows\n"
     ]
    }
   ],
   "source": [
    "df_tcr = cohort.as_dataframe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_blood = df_tcr[df_tcr[\"Sample Type\"] == \"PBMC\"]\n",
    "df_tumor = df_tcr[df_tcr[\"Sample Type\"] == \"Tumor\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "92"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_blood[\"Productive Unique TCRs (cnt)\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from utils.paper import range_formatter, hyper_label_printer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{blood_num_tcr:141,255 (range 43,052-335,089)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"blood_num_tcr\",\n",
    "    series=df_blood[\"Productive TCR Templates(cnt)\"],\n",
    "    round_to_int=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{blood_unique_tcr:82,636 (range 24,095-207,860)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"blood_unique_tcr\",\n",
    "    series=df_blood[\"Productive Unique TCRs (cnt)\"],\n",
    "    round_to_int=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{tumor_num_tcr:1,402 (range 63-133,167)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"tumor_num_tcr\",\n",
    "    series=df_tumor[\"Productive TCR Templates(cnt)\"],\n",
    "    round_to_int=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{tumor_unique_tcr:1,086 (range 67-56,273)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"tumor_unique_tcr\",\n",
    "    series=df_tumor[\"Productive Unique TCRs (cnt)\"],\n",
    "    round_to_int=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{blood_tcr_clonality:0.080 (range 0.014-0.37)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"blood_tcr_clonality\",\n",
    "    series=df_blood[\"Clonality\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{blood_tcell_fraction:0.31 (range 0.082-0.64)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"blood_tcell_fraction\",\n",
    "    series=df_blood[\"T-cell fraction\"] / 100.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{tumor_tcr_clonality:0.096 (range 0.033-0.34)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"tumor_tcr_clonality\",\n",
    "    series=df_tumor[\"Clonality\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{{{tumor_tcell_fraction:0.097 (range 0.0098-0.33)}}}\n"
     ]
    }
   ],
   "source": [
    "hyper_label_printer(\n",
    "    range_formatter,\n",
    "    label=\"tumor_tcell_fraction\",\n",
    "    series=df_tumor[\"T-cell fraction\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    " "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  },
  "name": "TCR Clonality.ipynb"
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
